Fix deadlock in XendDomainInfo when a domain is cleaned up. We are renaming
authoremellor@ewan <emellor@ewan>
Wed, 12 Oct 2005 09:11:35 +0000 (10:11 +0100)
committeremellor@ewan <emellor@ewan>
Wed, 12 Oct 2005 09:11:35 +0000 (10:11 +0100)
the domain, to make it clear that it is a zombie, but this renaming cannot
check the uniqueness of the new name, because this causes a deadlock with
XendDomain.  Instead, we allow the name to be non-unique for the case of
zombie domains.

Change the locking in waitForShutdown and state_set to be robust in the face of
exceptions.

Rename the STATE_VM_ constants to STATE_DOM_.

Signed-off-by: Ewan Mellor <ewan@xensource.com>
tools/python/xen/xend/XendDomain.py
tools/python/xen/xend/XendDomainInfo.py

index 7809c80fd9036f5c86799614d6b8802415785cd2..7b1ea4a75915d78ea3390ad383d40e91b91c90e1 100644 (file)
@@ -57,7 +57,7 @@ class XendDomain:
         # So we stuff the XendDomain instance (self) into xroot's components.
         xroot.add_component("xen.xend.XendDomain", self)
         self.domains = {}
-        self.domains_lock = threading.Condition()
+        self.domains_lock = threading.RLock()
         self.watchReleaseDomain()
 
         self.domains_lock.acquire()
@@ -318,7 +318,7 @@ class XendDomain:
             n = len(matching)
             if n == 1:
                 return matching[0]
-            elif n > 1:
+            elif n > 1 and not d.isTerminated():
                 log.error('Name uniqueness has been violated for name %s!  '
                           'Recovering by renaming:', name)
                 for d in matching:
index cc949d15e031666b64719b862a3aba6cd2e1e56b..3d008c5d3f46bc86b266694612bb9bc684a74554 100644 (file)
@@ -78,8 +78,8 @@ restart_modes = [
     "rename-restart"
     ]
 
-STATE_VM_OK         = "ok"
-STATE_VM_TERMINATED = "terminated"
+STATE_DOM_OK       = 1
+STATE_DOM_SHUTDOWN = 2
 
 """Flag for a block device backend domain."""
 SIF_BLK_BE_DOMAIN = (1<<4)
@@ -293,7 +293,7 @@ def parseConfig(config):
     restart = get_cfg('restart')
     if restart:
         def handle_restart(event, val):
-            if not event in result:
+            if result[event] is None:
                 result[event] = val
 
         if restart == "onreboot":
@@ -384,7 +384,7 @@ class XendDomainInfo:
         self.console_channel = None
         self.console_mfn = None
 
-        self.state = STATE_VM_OK
+        self.state = STATE_DOM_OK
         self.state_updated = threading.Condition()
         self.refresh_shutdown_lock = threading.Condition()
 
@@ -708,7 +708,7 @@ class XendDomainInfo:
                     self.clearRestart()
 
                     if reason == 'suspend':
-                        self.state_set(STATE_VM_TERMINATED)
+                        self.state_set(STATE_DOM_SHUTDOWN)
                         # Don't destroy the domain.  XendCheckpoint will do
                         # this once it has finished.
                     elif reason in ['poweroff', 'reboot']:
@@ -821,19 +821,31 @@ class XendDomainInfo:
 
     def state_set(self, state):
         self.state_updated.acquire()
-        if self.state != state:
-            self.state = state
-            self.state_updated.notifyAll()
-        self.state_updated.release()
+        try:
+            if self.state != state:
+                self.state = state
+                self.state_updated.notifyAll()
+        finally:
+            self.state_updated.release()
 
 
     ## public:
 
     def waitForShutdown(self):
         self.state_updated.acquire()
-        while self.state == STATE_VM_OK:
-            self.state_updated.wait()
-        self.state_updated.release()
+        try:
+            while self.state == STATE_DOM_OK:
+                self.state_updated.wait()
+        finally:
+            self.state_updated.release()
+
+
+    def isShutdown(self):
+        self.state_updated.acquire()
+        try:
+            return self.state == STATE_DOM_SHUTDOWN
+        finally:
+            self.state_updated.release()
 
 
     def __str__(self):
@@ -1065,11 +1077,11 @@ class XendDomainInfo:
 
         try:
             if not self.info['name'].startswith(ZOMBIE_PREFIX):
-                self.info['name'] = self.generateZombieName()
+                self.info['name'] = ZOMBIE_PREFIX + self.info['name']
         except:
             log.exception("Renaming Zombie failed.")
 
-        self.state_set(STATE_VM_TERMINATED)
+        self.state_set(STATE_DOM_SHUTDOWN)
 
 
     def cleanupVm(self):
@@ -1274,7 +1286,7 @@ class XendDomainInfo:
         log.info("Preserving dead domain %s (%d).", self.info['name'],
                  self.domid)
         self.storeDom('xend/shutdown_completed', 'True')
-        self.state_set(STATE_VM_TERMINATED)
+        self.state_set(STATE_DOM_SHUTDOWN)
 
 
     ## public:
@@ -1304,18 +1316,6 @@ class XendDomainInfo:
                 n += 1
 
 
-    def generateZombieName(self):
-        n = 0
-        name = ZOMBIE_PREFIX + self.info['name']
-        while True:
-            try:
-                self.check_name(name)
-                return name
-            except VmError:
-                n += 1
-                name = "%s%d-%s" % (ZOMBIE_PREFIX, n, self.info['name'])
-
-
     def configure_bootloader(self):
         if not self.info['bootloader']:
             return